import os
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
en_ewt -- Genre: blog, social, reviews, email, web
en_gum -- Genre: academic, blog, fiction, government, news, nonfiction, social, spoken, web, wiki
Acc (Wash the leaves; Got it)
То есть тут существительные и местоимения.
Neut (In other words; It is unsurprising)
Тут существительные собственные и нарицательные, местоимения.
Def (I shall leave this office; the)
Тут артикли и указательные местоимения.
Plur (Six months; See what else we got here; Let' s just double check that)
Тут существительные, местоимения, глаголы в каких то случаях кажется.
3 (всякое разное)
Тут местоимения, глагол.
Sup (On the hottest days; Most of the rooms)
Тут прилагательные, неопределенное местоимение, наречия (?).
Roman (I, II)
Числительные.
Frac (2.4) -- мало их
Числительные.
Sub (только в EWT и очень мало)
Наклонение.
Pres
Кажется тут только время глагола.
При помощи NeuroX tooklit
! Проблема -- недостаточное количество данных для некоторых меток. Возможно, отчасти из-за ограничений на длину предложения, но также потому что их просто недостаточно. Гипотетическое решение: подумать, как вытаскивать эти данные из неиспользуемых train-данных.
path = os.path.abspath('../')+'/results/'
#
with open(f'{path}scores_ewt.pkl', 'rb') as f:
scores_ewt = pickle.load(f)
with open(f'{path}scores_c_ewt.pkl', 'rb') as f:
scores_c_ewt = pickle.load(f)
with open(f'{path}scores_gum.pkl', 'rb') as f:
scores_gum = pickle.load(f)
with open(f'{path}scores_c_gum.pkl', 'rb') as f:
scores_c_gum = pickle.load(f)
with open(f'{path}scores_taiga.pkl', 'rb') as f:
scores_taiga = pickle.load(f)
with open(f'{path}scores_c_taiga.pkl', 'rb') as f:
scores_c_taiga = pickle.load(f)
#
with open(f'{path}neurons_ewt.pkl', 'rb') as f:
ordered_neurons_ewt = pickle.load(f)
with open(f'{path}neurons_gum.pkl', 'rb') as f:
ordered_neurons_gum = pickle.load(f)
with open(f'{path}neurons_taiga.pkl', 'rb') as f:
ordered_neurons_taiga = pickle.load(f)
#
with open(f'{path}size_ewt.pkl', 'rb') as f:
size_ewt = pickle.load(f)
with open(f'{path}size_taiga.pkl', 'rb') as f:
size_taiga = pickle.load(f)
with open(f'{path}size_gum.pkl', 'rb') as f:
size_gum = pickle.load(f)
#
with open(f'{path}scores_keep_ewt.pkl', 'rb') as f:
scores_keep_ewt = pickle.load(f)
with open(f'{path}scores_keep_gum.pkl', 'rb') as f:
scores_keep_gum = pickle.load(f)
with open(f'{path}scores_remove_ewt.pkl', 'rb') as f:
scores_remove_ewt = pickle.load(f)
with open(f'{path}scores_remove_gum.pkl', 'rb') as f:
scores_remove_gum = pickle.load(f)
def bad_scores(scores):
for k, v in scores.items():
m1 = []
m2 = []
for i, j in v[0].items():
if v[0][i] < 0.5:
if not m1.__contains__(v[0]):
m1.append(v[0])
print(k, i, 'train_score', v[0][i])
if m1:
print(k, 'train', m1)
print('---------------------')
for i, j in v[1].items():
if v[1][i] < 0.5:
if not m2.__contains__(v[1]):
m2.append(v[1])
print(k, i, 'test_score', v[1][i])
if m2:
print(k, 'test', m2)
print('---------------------')
Genre: blog, social, reviews, email, web
bad_scores(scores_ewt)
Degree Cmp test_score 0.32051282051282054
Degree test [{'__OVERALL__': 0.7570281124497992, 'Cmp': 0.32051282051282054, 'Pos': 0.9733333333333334, 'Sup': 0.5}]
---------------------
NumType Mult test_score 0.1111111111111111
NumType Frac test_score 0.0
NumType test [{'__OVERALL__': 0.6714285714285714, 'Ord': 0.8095238095238095, 'Mult': 0.1111111111111111, 'Card': 0.68, 'Frac': 0.0}]
---------------------
PronType Rel test_score 0.17857142857142858
PronType Neg test_score 0.3333333333333333
PronType test [{'__OVERALL__': 0.7316602316602316, 'Art': 0.6696428571428571, 'Prs': 0.8660714285714286, 'Dem': 0.8214285714285714, 'Ind': 0.6774193548387096, 'Rel': 0.17857142857142858, 'Int': 0.75, 'Neg': 0.3333333333333333, 'Tot': 0.6}]
---------------------
NumForm Roman test_score 0.0
NumForm test [{'__OVERALL__': 0.7681498829039812, 'Roman': 0.0, 'Digit': 0.7833333333333333, 'Word': 0.75}]
---------------------
VerbForm Ger test_score 0.26785714285714285
VerbForm test [{'__OVERALL__': 0.6607369758576874, 'Part': 0.8266666666666667, 'Fin': 0.5111111111111111, 'Ger': 0.26785714285714285, 'Inf': 0.84}]
---------------------
Mood Sub train_score 0.0
Mood train [{'__OVERALL__': 0.989480198019802, 'Imp': 0.9845956354300385, 'Ind': 0.9987995198079231, 'Sub': 0.0}]
---------------------
Mood Sub test_score 0.0
Mood test [{'__OVERALL__': 0.9595375722543352, 'Imp': 0.9537037037037037, 'Ind': 0.9733333333333334, 'Sub': 0.0}]
---------------------
ExtPos ADP test_score 0.3333333333333333
ExtPos PRON test_score 0.0
ExtPos SCONJ test_score 0.0
ExtPos CCONJ test_score 0.25
ExtPos test [{'__OVERALL__': 0.6571428571428571, 'ADV': 0.95, 'ADP': 0.3333333333333333, 'PRON': 0.0, 'SCONJ': 0.0, 'CCONJ': 0.25}]
---------------------
def accuracy_plot(dct_acc, dct_data):
cats=[k for k in dct_acc.keys()]
assert [k for k in dct_acc.keys()] == [k for k in dct_data.keys()]
accuracy_train=[round(v[0]['__OVERALL__'], 2) for k, v in dct_acc.items()]
accuracy_test = [round(v[1]['__OVERALL__'], 2) for k, v in dct_acc.items()]
train=[v[0] for k, v in dct_data.items()]
test = [v[1] for k, v in dct_data.items()]
num_classes = [v[2] for k, v in dct_data.items()]
d = pd.DataFrame({'categories': cats, 'train_acc' : accuracy_train, 'test_acc': accuracy_test,
'train_data': train, 'test_data': test, 'num_classes': num_classes})
fig1 = px.line(d, x='categories', y=['train_acc', 'test_acc'], template="plotly_white")
fig2 = px.line(d, x='categories', y=['train_data', 'test_data', 'num_classes'], template= "seaborn")
fig3 = px.bar(d, x='categories', y='num_classes', template="plotly")
fig3.update_traces(texttemplate='%{y}', textposition='outside')
fig3.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig = make_subplots(rows=3, cols=1, subplot_titles=('Accuracy', 'Data size', 'Num classes'))
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig1['data'][1], row=1, col=1)
fig.add_trace(fig2['data'][0], row=2, col=1)
fig.add_trace(fig2['data'][1], row=2, col=1)
fig.add_trace(fig3['data'][0], row=3, col=1)
fig.update_layout(height=1400, width=900)
fig.update_layout(showlegend=False)
fig.show()
accuracy_plot(scores_ewt, size_ewt)
def accuracy(dct_acc):
cats=[k for k in dct_acc.keys()]
accuracy_train=[round(v[0]['__OVERALL__'], 2) for k, v in dct_acc.items()]
accuracy_test = [round(v[1]['__OVERALL__'], 2) for k, v in dct_acc.items()]
d = pd.DataFrame({'categories': cats, 'train_acc' : accuracy_train, 'test_acc': accuracy_test})
fig = px.line(d, x='categories', y=['train_acc', 'test_acc'], template="plotly_white")
fig.show()
accuracy(scores_c_ewt)
def check_selectivity(scores):
m = []
for k, v in scores.items():
m.append(v[1]['__OVERALL__'])
return np.array(m)
selectivity = check_selectivity(scores_ewt) - check_selectivity(scores_c_ewt)
#'Selectivity (Diff. between true task and control task performance)
dict(zip(list(scores_ewt.keys()), selectivity))
{'Degree': 0.3112449799196787,
'Definite': 0.25111111111111106,
'NumType': 0.0,
'PronType': 0.5926640926640927,
'NumForm': 0.11709601873536302,
'VerbForm': 0.3710292249047014,
'Number': 0.3188888888888889,
'Person': 0.48295454545454547,
'Case': 0.375,
'Mood': 0.4161849710982659,
'Tense': 0.3666666666666667,
'ExtPos': 0.2285714285714286,
'Gender': 0.5154639175257733}
Genre: academic, blog, fiction, government, news, nonfiction, social, spoken, web, wiki
bad_scores(scores_gum)
PronType Ind test_score 0.3333333333333333
PronType Rel test_score 0.12903225806451613
PronType Neg test_score 0.0
PronType test [{'__OVERALL__': 0.7245508982035929, 'Art': 0.75, 'Int': 0.8970588235294118, 'Dem': 0.875, 'Tot': 0.5869565217391305, 'Prs': 0.7410714285714286, 'Ind': 0.3333333333333333, 'Rel': 0.12903225806451613, 'Neg': 0.0}]
---------------------
Degree Sup test_score 0.37142857142857144
Degree Cmp test_score 0.16666666666666666
Degree test [{'__OVERALL__': 0.7735849056603774, 'Sup': 0.37142857142857144, 'Cmp': 0.16666666666666666, 'Pos': 0.8933333333333333}]
---------------------
NumType Mult test_score 0.16666666666666666
NumType Ord test_score 0.3387096774193548
NumType Frac test_score 0.42857142857142855
NumType test [{'__OVERALL__': 0.7452229299363057, 'Mult': 0.16666666666666666, 'Ord': 0.3387096774193548, 'Card': 0.9022222222222223, 'Frac': 0.42857142857142855}]
---------------------
Case Acc test_score 0.20238095238095238
Case test [{'__OVERALL__': 0.7517361111111112, 'Gen': 0.8333333333333334, 'Nom': 0.8533333333333334, 'Acc': 0.20238095238095238}]
---------------------
NumForm Word test_score 0.30714285714285716
NumForm Roman test_score 0.0
NumForm test [{'__OVERALL__': 0.686046511627907, 'Word': 0.30714285714285716, 'Roman': 0.0, 'Digit': 0.965}]
---------------------
accuracy_plot(scores_gum, size_gum)
accuracy(scores_c_gum)
selectivity = check_selectivity(scores_gum) - check_selectivity(scores_c_gum)
dict(zip(list(scores_gum.keys()), selectivity))
{'Definite': 0.20828105395232122,
'PronType': 0.5169660678642716,
'Number': 0.3696145124716553,
'Mood': 0.38586956521739135,
'Gender': 0.49482401656314695,
'VerbForm': 0.30736543909348435,
'Degree': 0.008086253369272267,
'NumType': 0.23248407643312097,
'Case': 0.26388888888888895,
'Tense': 0.28,
'Person': 0.5849710982658959,
'NumForm': 0.17732558139534882}
В чем суть: пока простой тупой метод посмотреть общий set(нейронов), если для каждой категории выбираем N-top нейронов в ранжировании.
Всего нейронов в BERT 9984 (13 слоев * 768 -- размерность эмбеддинга).
Всего в каждом датасете было 12 категорий.
def get_overall_common_neurons(dct, nn=[50, 100, 150, 200, 250, 300, 350, 400, 450, 500]):
d1, d2 = [], []
for n in nn:
neurons_list = []
for k, v in dct.items():
v = v[:n]
neurons_list+=v
d1.append(len(neurons_list))
d2.append(len(set(neurons_list)))
d = pd.DataFrame({f'top N-neuron for all {len(dct.keys())} categories': nn, 'all' : d1, 'unique': d2})
fig = px.bar(d, x=f'top N-neuron for all {len(dct.keys())} categories', y=['unique', 'all'], template="plotly_white", barmode='group')
fig.update_traces(texttemplate='%{y}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_xaxes(tick0=0, dtick=50)
fig.show()
get_overall_common_neurons(ordered_neurons_ewt)
get_overall_common_neurons(ordered_neurons_gum)
def common_neurons(d1, d2, nn=[50, 100, 150, 200, 250, 300, 350, 400, 450, 500]):
common_cats = []
for k1 in d1.keys():
for k2 in d2.keys():
if k1 == k2:
common_cats.append(k1)
df = pd.DataFrame(index=nn, columns=sorted(common_cats))
df = df.fillna(0)
for cat in sorted(common_cats):
common_neurons = []
for n in nn:
p = set(d1[cat][:n]) & set(d2[cat][:n])
common_neurons.append(len(p))
df[cat] = common_neurons
return df
df = common_neurons(ordered_neurons_ewt, ordered_neurons_gum)
df
| Case | Definite | Degree | Gender | Mood | NumForm | NumType | Number | Person | PronType | Tense | VerbForm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 50 | 1 | 3 | 1 | 10 | 4 | 1 | 0 | 4 | 4 | 1 | 2 | 1 |
| 100 | 2 | 8 | 3 | 23 | 14 | 3 | 3 | 11 | 8 | 4 | 8 | 5 |
| 150 | 6 | 16 | 6 | 31 | 24 | 8 | 5 | 19 | 19 | 5 | 16 | 12 |
| 200 | 11 | 22 | 9 | 46 | 30 | 10 | 7 | 23 | 26 | 8 | 29 | 21 |
| 250 | 14 | 35 | 17 | 57 | 37 | 11 | 11 | 28 | 31 | 16 | 38 | 25 |
| 300 | 19 | 44 | 23 | 73 | 41 | 19 | 20 | 43 | 37 | 22 | 49 | 31 |
| 350 | 25 | 53 | 30 | 82 | 52 | 26 | 31 | 47 | 48 | 28 | 63 | 37 |
| 400 | 33 | 61 | 35 | 101 | 61 | 33 | 38 | 54 | 58 | 36 | 73 | 45 |
| 450 | 42 | 68 | 39 | 118 | 72 | 46 | 42 | 58 | 75 | 48 | 84 | 62 |
| 500 | 50 | 77 | 48 | 130 | 81 | 52 | 50 | 66 | 83 | 58 | 97 | 77 |
def visualise(df):
CAT = list(df.columns)
a = 4 # number of rows
b = 3 # number of columns
c = 1 # initialize plot counter
fig = plt.figure(figsize=(10,8))
col_map = plt.get_cmap('Paired')
for i in CAT:
plt.subplot(a, b, c)
plt.title(f'{i}')
plt.xlabel('N neurons')
plt.bar(list(df.index), list(df[i]), width=30, bottom=0,
color=col_map.colors, edgecolor='k', linewidth=1)
c = c + 1
plt.tight_layout()
plt.show()
visualise(df)
accuracy(scores_keep_ewt)
accuracy(scores_remove_ewt)
accuracy(scores_keep_gum)
accuracy(scores_remove_gum)
bad_scores(scores_taiga)
NumType Ord train_score 0.456575682382134
NumType Sets train_score 0.02564102564102564
NumType train [{'__OVERALL__': 0.7259062776304156, 'Ord': 0.456575682382134, 'Sets': 0.02564102564102564, 'Frac': 0.53125, 'Card': 0.9632}]
---------------------
NumType Ord test_score 0.3333333333333333
NumType Sets test_score 0.0
NumType Frac test_score 0.18181818181818182
NumType test [{'__OVERALL__': 0.7245283018867924, 'Ord': 0.3333333333333333, 'Sets': 0.0, 'Frac': 0.18181818181818182, 'Card': 0.8826530612244898}]
---------------------
Case Dat train_score 0.42628205128205127
Case Par train_score 0.0
Case Gen train_score 0.25
Case train [{'__OVERALL__': 0.5581518852894317, 'Ins': 0.7756410256410257, 'Dat': 0.42628205128205127, 'Nom': 0.7051282051282052, 'Par': 0.0, 'Voc': 0.8571428571428571, 'Acc': 0.5576923076923077, 'Gen': 0.25, 'Loc': 0.6346153846153846}]
---------------------
Case __OVERALL__ test_score 0.3367003367003367
Case Dat test_score 0.21794871794871795
Case Nom test_score 0.4017857142857143
Case Par test_score 0.0
Case Voc test_score 0.0
Case Acc test_score 0.3392857142857143
Case Gen test_score 0.20535714285714285
Case Loc test_score 0.37755102040816324
Case test [{'__OVERALL__': 0.3367003367003367, 'Ins': 0.5063291139240507, 'Dat': 0.21794871794871795, 'Nom': 0.4017857142857143, 'Par': 0.0, 'Voc': 0.0, 'Acc': 0.3392857142857143, 'Gen': 0.20535714285714285, 'Loc': 0.37755102040816324}]
---------------------
Animacy Inan train_score 0.1864
Animacy train [{'__OVERALL__': 0.5864, 'Anim': 0.9864, 'Inan': 0.1864}]
---------------------
Animacy Inan test_score 0.1511111111111111
Animacy test [{'__OVERALL__': 0.5073349633251834, 'Anim': 0.9429347826086957, 'Inan': 0.1511111111111111}]
---------------------
VerbForm Part train_score 0.42964352720450283
VerbForm Conv train_score 0.045454545454545456
VerbForm train [{'__OVERALL__': 0.6620498614958449, 'Part': 0.42964352720450283, 'Conv': 0.045454545454545456, 'Fin': 0.8144, 'Inf': 0.7296}]
---------------------
VerbForm Part test_score 0.25
VerbForm Conv test_score 0.0
VerbForm test [{'__OVERALL__': 0.5566265060240964, 'Part': 0.25, 'Conv': 0.0, 'Fin': 0.64, 'Inf': 0.5347222222222222}]
---------------------
Person 2 train_score 0.4561824729891957
Person train [{'__OVERALL__': 0.6414565826330533, '1': 0.9387755102040817, '3': 0.5294117647058824, '2': 0.4561824729891957}]
---------------------
Person 3 test_score 0.41333333333333333
Person 2 test_score 0.4536082474226804
Person test [{'__OVERALL__': 0.5894206549118388, '1': 0.8533333333333334, '3': 0.41333333333333333, '2': 0.4536082474226804}]
---------------------
Voice Mid train_score 0.3877551020408163
Voice train [{'__OVERALL__': 0.5967818267865594, 'Mid': 0.3877551020408163, 'Pass': 0.7539149888143176, 'Act': 0.7214885954381752}]
---------------------
Voice Mid test_score 0.3157894736842105
Voice test [{'__OVERALL__': 0.573170731707317, 'Mid': 0.3157894736842105, 'Pass': 0.6, 'Act': 0.7}]
---------------------
Number Plur test_score 0.4519230769230769
Number test [{'__OVERALL__': 0.648960739030023, 'Plur': 0.4519230769230769, 'Sing': 0.8311111111111111}]
---------------------
NumForm Combi train_score 0.17543859649122806
NumForm train [{'__OVERALL__': 0.865351131746953, 'Digit': 0.9351740696278511, 'Word': 0.8427370948379351, 'Combi': 0.17543859649122806}]
---------------------
NumForm Combi test_score 0.0
NumForm test [{'__OVERALL__': 0.8321167883211679, 'Digit': 0.9075144508670521, 'Word': 0.7395833333333334, 'Combi': 0.0}]
---------------------
Mood Cnd train_score 0.4594594594594595
Mood train [{'__OVERALL__': 0.8436154949784792, 'Imp': 0.7597535934291582, 'Ind': 0.9267707082833133, 'Cnd': 0.4594594594594595}]
---------------------
Mood Cnd test_score 0.0
Mood Imp test_score 0.009708737864077669
Mood test [{'__OVERALL__': 0.6376811594202898, 'Cnd': 0.0, 'Ind': 0.8766666666666667, 'Imp': 0.009708737864077669}]
---------------------
NameType Pro train_score 0.23809523809523808
NameType Prs train_score 0.38823529411764707
NameType Sur train_score 0.3588039867109635
NameType Oth train_score 0.1
NameType Zoo train_score 0.0
NameType train [{'__OVERALL__': 0.5010060362173038, 'Com': 0.6987179487179487, 'Giv': 0.5737179487179487, 'Pro': 0.23809523809523808, 'Prs': 0.38823529411764707, 'Sur': 0.3588039867109635, 'Oth': 0.1, 'Zoo': 0.0, 'Geo': 0.5096153846153846}]
---------------------
NameType __OVERALL__ test_score 0.2358974358974359
NameType Com test_score 0.49019607843137253
NameType Giv test_score 0.37681159420289856
NameType Prs test_score 0.0
NameType Pro test_score 0.0
NameType Sur test_score 0.21568627450980393
NameType Oth test_score 0.0
NameType Zoo test_score 0.0
NameType Geo test_score 0.3225806451612903
NameType test [{'__OVERALL__': 0.2358974358974359, 'Com': 0.49019607843137253, 'Giv': 0.37681159420289856, 'Prs': 0.0, 'Pro': 0.0, 'Sur': 0.21568627450980393, 'Oth': 0.0, 'Zoo': 0.0, 'Geo': 0.3225806451612903}]
---------------------
Gender Fem train_score 0.48739495798319327
Gender train [{'__OVERALL__': 0.6442577030812325, 'Fem': 0.48739495798319327, 'Masc': 0.6146458583433373, 'Neut': 0.8307322929171669}]
---------------------
Gender Fem test_score 0.49
Gender Masc test_score 0.43666666666666665
Gender test [{'__OVERALL__': 0.5022222222222222, 'Fem': 0.49, 'Masc': 0.43666666666666665, 'Neut': 0.58}]
---------------------
Degree __OVERALL__ test_score 0.4888888888888889
Degree Pos test_score 0.44333333333333336
Degree test [{'__OVERALL__': 0.4888888888888889, 'Sup': 0.5, 'Cmp': 0.75, 'Pos': 0.44333333333333336}]
---------------------
PronType Exc train_score 0.48
PronType Rel train_score 0.44
PronType Neg train_score 0.4095744680851064
PronType Rcp train_score 0.0
PronType Emp train_score 0.4642857142857143
PronType Dem train_score 0.46
PronType Prs train_score 0.124
PronType train [{'__OVERALL__': 0.5411140583554377, 'Exc': 0.48, 'Rel': 0.44, 'Int': 0.9, 'Neg': 0.4095744680851064, 'Tot': 0.704, 'Rcp': 0.0, 'Emp': 0.4642857142857143, 'Dem': 0.46, 'Prs': 0.124, 'Ind': 0.784}]
---------------------
PronType __OVERALL__ test_score 0.3274336283185841
PronType Exc test_score 0.3333333333333333
PronType Rel test_score 0.09836065573770492
PronType Neg test_score 0.47368421052631576
PronType Tot test_score 0.45555555555555555
PronType Rcp test_score 0.0
PronType Emp test_score 0.1
PronType Dem test_score 0.3111111111111111
PronType Prs test_score 0.17777777777777778
PronType Ind test_score 0.39285714285714285
PronType test [{'__OVERALL__': 0.3274336283185841, 'Exc': 0.3333333333333333, 'Rel': 0.09836065573770492, 'Int': 0.6274509803921569, 'Neg': 0.47368421052631576, 'Tot': 0.45555555555555555, 'Rcp': 0.0, 'Emp': 0.1, 'Dem': 0.3111111111111111, 'Prs': 0.17777777777777778, 'Ind': 0.39285714285714285}]
---------------------
Tense Pres train_score 0.3217286914765906
Tense Fut train_score 0.3789004457652303
Tense train [{'__OVERALL__': 0.5656263360410432, 'Past': 0.9603841536614646, 'Pres': 0.3217286914765906, 'Fut': 0.3789004457652303}]
---------------------
Tense __OVERALL__ test_score 0.49137931034482757
Tense Pres test_score 0.19333333333333333
Tense Fut test_score 0.3020833333333333
Tense test [{'__OVERALL__': 0.49137931034482757, 'Past': 0.85, 'Pres': 0.19333333333333333, 'Fut': 0.3020833333333333}]
---------------------
accuracy_plot(scores_taiga, size_taiga)
accuracy(scores_c_taiga)
selectivity = check_selectivity(scores_taiga) - check_selectivity(scores_c_taiga)
dict(zip(list(scores_taiga.keys()), selectivity))
{'NumType': 0.1132075471698113,
'Aspect': 0.012373453318335281,
'Case': 0.2154882154882155,
'Animacy': -0.0317848410757946,
'VerbForm': 0.09879518072289156,
'Person': 0.2682619647355164,
'Voice': 0.3414634146341463,
'Number': 0.13279445727482675,
'NumForm': 0.39051094890510946,
'Mood': 0.060386473429951626,
'NameType': 0.11282051282051281,
'Gender': 0.20111111111111113,
'Degree': -0.27499999999999997,
'PronType': 0.15707964601769914,
'Tense': 0.2183908045977011}
common_neurons(ordered_neurons_taiga, ordered_neurons_gum)
| Case | Degree | Gender | Mood | NumForm | NumType | Number | Person | PronType | Tense | VerbForm | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 50 | 0 | 0 | 1 | 1 | 0 | 2 | 3 | 0 | 1 | 2 | 0 |
| 100 | 1 | 5 | 1 | 4 | 1 | 3 | 3 | 0 | 3 | 2 | 1 |
| 150 | 2 | 5 | 2 | 10 | 3 | 6 | 3 | 1 | 6 | 4 | 3 |
| 200 | 2 | 5 | 2 | 12 | 5 | 12 | 4 | 3 | 9 | 4 | 8 |
| 250 | 3 | 7 | 8 | 15 | 11 | 18 | 8 | 7 | 18 | 13 | 11 |
| 300 | 4 | 9 | 10 | 18 | 13 | 21 | 9 | 10 | 22 | 14 | 14 |
| 350 | 7 | 13 | 15 | 24 | 17 | 23 | 14 | 15 | 28 | 17 | 19 |
| 400 | 14 | 17 | 20 | 27 | 21 | 27 | 16 | 20 | 29 | 21 | 27 |
| 450 | 23 | 24 | 28 | 34 | 25 | 31 | 19 | 26 | 31 | 28 | 34 |
| 500 | 29 | 36 | 36 | 36 | 35 | 33 | 26 | 32 | 36 | 33 | 40 |
common_neurons(ordered_neurons_taiga, ordered_neurons_ewt)
| Case | Degree | Gender | Mood | NumForm | NumType | Number | Person | PronType | Tense | VerbForm | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 50 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 |
| 100 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 2 | 3 | 2 | 0 |
| 150 | 4 | 1 | 6 | 1 | 3 | 1 | 3 | 7 | 5 | 3 | 2 |
| 200 | 6 | 4 | 11 | 6 | 5 | 2 | 4 | 7 | 7 | 7 | 6 |
| 250 | 7 | 8 | 15 | 10 | 7 | 3 | 5 | 12 | 10 | 12 | 8 |
| 300 | 12 | 11 | 23 | 15 | 12 | 8 | 7 | 15 | 13 | 17 | 11 |
| 350 | 16 | 17 | 28 | 21 | 17 | 11 | 14 | 20 | 17 | 25 | 15 |
| 400 | 18 | 22 | 34 | 27 | 23 | 13 | 21 | 28 | 20 | 29 | 20 |
| 450 | 21 | 29 | 40 | 37 | 27 | 17 | 26 | 30 | 25 | 35 | 28 |
| 500 | 26 | 41 | 48 | 43 | 38 | 25 | 31 | 32 | 30 | 40 | 32 |
common_neurons(ordered_neurons_ewt, ordered_neurons_gum)
| Case | Definite | Degree | Gender | Mood | NumForm | NumType | Number | Person | PronType | Tense | VerbForm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 50 | 1 | 3 | 1 | 10 | 4 | 1 | 0 | 4 | 4 | 1 | 2 | 1 |
| 100 | 2 | 8 | 3 | 23 | 14 | 3 | 3 | 11 | 8 | 4 | 8 | 5 |
| 150 | 6 | 16 | 6 | 31 | 24 | 8 | 5 | 19 | 19 | 5 | 16 | 12 |
| 200 | 11 | 22 | 9 | 46 | 30 | 10 | 7 | 23 | 26 | 8 | 29 | 21 |
| 250 | 14 | 35 | 17 | 57 | 37 | 11 | 11 | 28 | 31 | 16 | 38 | 25 |
| 300 | 19 | 44 | 23 | 73 | 41 | 19 | 20 | 43 | 37 | 22 | 49 | 31 |
| 350 | 25 | 53 | 30 | 82 | 52 | 26 | 31 | 47 | 48 | 28 | 63 | 37 |
| 400 | 33 | 61 | 35 | 101 | 61 | 33 | 38 | 54 | 58 | 36 | 73 | 45 |
| 450 | 42 | 68 | 39 | 118 | 72 | 46 | 42 | 58 | 75 | 48 | 84 | 62 |
| 500 | 50 | 77 | 48 | 130 | 81 | 52 | 50 | 66 | 83 | 58 | 97 | 77 |